This version
Current version: 1
Date report generated: 26 October, 2020
Report prepared for: Daniel Desaulniers, Cathy Cummings-Lorbetskie
Purpose of report:
Previous revisions
N/A
This report is meant to help explore DESeq2 results and was generated using RMarkdown. This section contains the code for setting up the rest of the report.
## knitrBoostrap and device chunk options
library('knitr')
# Set so that long lines in R will be wrapped:
opts_chunk$set(bootstrap.show.code = FALSE, bootstrap.panel = TRUE, cache = TRUE) # tidy.opts=list(width.cutoff=60), tidy=TRUE, crop = NULL
# Set this variable to FALSE if you only want to run the plotting functions by reloading the last RData file.
flag=TRUE # To eveluate analysis chunks
if (flag==FALSE) { load("Partial_analysis.RData") }
# Set this variable to TRUE if you would like to embed the files directly into the HTML for portability. This slows down page responsiveness drastically, since the files are generally quite large.
embedFiles=FALSE
# Se this variable to be TRUE if you want to have separate plots of top genes as defined in the R-ODAF template
R_ODAF_plots=FALSE
message("If this text is visible by default, this report was produced to test plotting functions and should be used exclusively for testing and development.")
#### Load Libraries
## Record start time
startTime <- Sys.time()
## Set libPaths
.libPaths(c("/home/mpiage/R/x86_64-pc-linux-gnu-library/3.6",.libPaths()))
## Bioconductor
library('DESeq2')
library('edgeR')
#library('org.Ca.eg.db') # MOUSE: library('org.Mm.eg.db') # HUMAN: library('org.Hs.eg.db') # RAT: library('org.Rn.eg.db') # HAMSTER: library('org.Ca.eg.db')
library('AnnotationHub')
OrgDb.Ma <- query(AnnotationHub(), c("OrgDb","Mesocricetus auratus"))[[1]] # Golden Hamster
library('enrichplot')
library('rWikiPathways')
library('BiocParallel')
## CRAN
library('ggplot2')
theme_set(theme_bw())
library('knitr')
library('RColorBrewer')
library('viridis')
library('pheatmap')
library('DT')
library('sessioninfo')
library('plotly')
## Other packages
packages <- c("RMariaDB",
"clusterProfiler",
"biomaRt",
"regionReport",
"magrittr",
"vsn",
"lattice",
"pheatmap",
"dplyr",
"data.table",
"forcats",
"tidytext",
"openxlsx",
"kableExtra"
)
invisible(suppressPackageStartupMessages(lapply(packages, function(x)require(x, character.only = T, quietly = T))))
options(java.parameters = "-Xmx10000m")
The code above (not shown by default) loads the relevant R packages required for analysis.
###################################################################################
###################################################################################
# PARAMETERS TO SET MANUALLY
# Set file locations
sampledir <- "/home/mpiage/data/2020_hamster_RNAseq_Desaulniers/"
setwd(sampledir)
outputdir <- paste(sampledir, "./DEG_output/", sep="")
if(!dir.exists(outputdir)) {dir.create(outputdir)}
# Names of files to load
SampleDataFile <- "genes.data.tsv" #This tab delimited file contains the merged RSEM.genes.results files
SampleKeyFile <- "metadata.txt" #This tab delimited file contains at least 2 columns: NAME (sample names identical to the column names of sampleData) and Compound (needs to identify to which group the sample belongs -> ExperimentalGroup & ControlGroup)
ContrastsFile <- "contrasts.txt" #This tab delimited file contains one row per contrast, with the control for comparison in the right column and the comparison in the left column
# Specify which groups need to be compared
contrasts <- read.delim(ContrastsFile, stringsAsFactors=FALSE, sep="\t", header=FALSE, quote="\"")
short_contrast_names <- paste(contrasts$V1,"v.",contrasts$V2) # Customize these for your experiment... Must be short enough to fit as Excel tab titles.
intgroup <- DESIGN <- "group" #Column name which defines the groups to be compared
nBestFeatures <- 20 # The number of best features to make plots of their counts
nBest <- 100 # Number of features to include in table and limiting PCA/clustering analysis
nHeatmap <- 50 # Number of most variable genes for heatmap
nHeatmapDEGs <- 50 # Number of DEGs for heatmap
# Set analysis ID. This ID will be used as prefix for the output files
analysisID <-"2020_"
# Specify used platform/technology for data generation:
Platform <- "RNA-Seq" # Specify "RNA-Seq" or "TempO-seq"
# Misc parameters
digits = 2 # For rounding numbers
The code above (not shown by default) specifies user preferences and data locations.
The experimental comparisons of interest are as follows: 1 5adC v. 0 Naive, 5 5adC v. 0 Naive, 1 5adC v. 0 Vehicle, 5 5adC v. 0 Vehicle, 0 Vehicle v. 0 Naive.
# Load input files
setwd(sampledir)
sampleData <- read.delim(SampleDataFile,
sep="\t",
stringsAsFactors=FALSE,
header=TRUE,
quote="\"",
row.names=1,
check.names=FALSE)
DESeqDesign <- read.delim(SampleKeyFile,
stringsAsFactors=FALSE,
sep="\t",
header=TRUE,
quote="\"",
row.names="name") # Pick column that is used in ID
NORM_TYPE<-paste0(analysisID, "_DESeq2_", Platform)
plotdir <- paste(outputdir, "/plots/", sep="")
if(!dir.exists(plotdir)) {dir.create(plotdir)}
barplot.dir<- paste(plotdir, "/barplot_genes/", sep="")
if(!dir.exists(barplot.dir)) {dir.create(barplot.dir)}
The code above (not shown by default) loads user-provided meta data (i.e., information about your experiment, also known as colData, or, column data). This also imports the countx matrix (i.e., a table of observed counts in which each sample is a column and genes are rows).
##########
# DESeq2 #
##########
print(NORM_TYPE) # Name of experiment
## [1] "2020__DESeq2_RNA-Seq"
# First data clean-up: replace NA & remove samples with total readcount < threshold
threshold = 1000000
initialSampleDataCount <- ncol(sampleData)
sampleData[ is.na(sampleData) ] <- 0
sampleData <- sampleData[,(colSums(sampleData) > threshold)] # 1 million reads required per sample
filteredSampleDataCount <- ncol(sampleData)
# Sometimes extra cleanup may be needed
# colnames(sampleData) <- gsub(pattern="^0", replacement="", x=colnames(sampleData))
samples_before <- nrow(DESeqDesign)
DESeqDesign$original_names <- rownames(DESeqDesign)
metadata_in_sampledata <- all(rownames(DESeqDesign) %in% colnames(sampleData)) # Sanity check: each sample name (row) in the metadata should have a corresponding column in the count data
sampledata_in_metadata <- all(colnames(sampleData) %in% rownames(DESeqDesign)) # Sanity check: each sample name (row) in the metadata should have a corresponding column in the count data
removed <- colnames(sampleData[which(!colnames(sampleData) %in% rownames(DESeqDesign))])
DESeqDesign <- DESeqDesign[(colnames(sampleData)),] # Reorder the metadata table to correspond to the order of columns in the count data
DESeqDesign <- na.omit(DESeqDesign)
DESeqDesign <- DESeqDesign[ DESeqDesign$treatment != "human reference RNA",] # Remove samples manually
sampleData <- sampleData[,(rownames(DESeqDesign))]
samples_after <- nrow(DESeqDesign)
# For TempO-Seq: Limit sampleData matrix to genes in the assay.
if (Platform=="TempO-Seq") {
biospyder <- read.delim("~/dbs/biospyder/191113_Human_S1500_Surrogate_2.0_Manifest.csv", # Assay manifest...
stringsAsFactors=FALSE,
sep="\t",
header=TRUE,
quote="\"")
sampleData <- sampleData[row.names(sampleData) %in% biospyder$ENSEMBL_GENE_ID,]
}
head(rownames(DESeqDesign))
## [1] "1_um_5adC_rep_1" "1_um_5adC_rep_2" "1_um_5adC_rep_3" "1_um_5adC_rep_4"
## [5] "5_um_5adC_rep_1" "5_um_5adC_rep_2"
head(colnames(sampleData)) # Output should match
## [1] "1_um_5adC_rep_1" "1_um_5adC_rep_2" "1_um_5adC_rep_3" "1_um_5adC_rep_4"
## [5] "5_um_5adC_rep_1" "5_um_5adC_rep_2"
setwd(outputdir)
if (file.exists("dds.Rdata")) {
print(paste("Already found DESeq2 object from previous run; loading from disk."))
load("./dds.Rdata")
if (!identical(as.data.frame(round(counts(dds))),
round(sampleData),0)) {
print("Not identical")
}
} else {
dds <- DESeqDataSetFromMatrix(countData = round(sampleData),
colData = as.data.frame(DESeqDesign),
design = ~group )
dds <- dds[, rownames(DESeqDesign)]
dds <- dds[rowSums(counts(dds)) > 1]
dds <- DESeq(dds, parallel = TRUE, BPPARAM=MulticoreParam(39))
save(dds, file="./dds.Rdata")
}
## [1] "Already found DESeq2 object from previous run; loading from disk."
## [1] "Not identical"
# ### REMOVE if done above from scratch
# keep <- grep(colData(dds)$chemical, pattern="cells", invert=T, ignore.case = T)
# dds <- dds[,keep]
# dds <- dds[row.names(dds) %in% biospyder$ENSEMBL_GENE_ID,]
# Another sanity check to make sure the object looks correct
resultsNames(dds)
## [1] "Intercept" "group_0.Vehicle_vs_0.Naive"
## [3] "group_1.5adC_vs_0.Naive" "group_5.5adC_vs_0.Naive"
head(colData(dds))
## DataFrame with 6 rows and 6 columns
## dose treatment replicate group original_names
## <integer> <character> <character> <factor> <character>
## 1_um_5adC_rep_1 1 5adC rep1 1 5adC 1_um_5adC_rep_1
## 1_um_5adC_rep_2 1 5adC rep2 1 5adC 1_um_5adC_rep_2
## 1_um_5adC_rep_3 1 5adC rep3 1 5adC 1_um_5adC_rep_3
## 1_um_5adC_rep_4 1 5adC rep4 1 5adC 1_um_5adC_rep_4
## 5_um_5adC_rep_1 5 5adC rep1 5 5adC 5_um_5adC_rep_1
## 5_um_5adC_rep_2 5 5adC rep2 5 5adC 5_um_5adC_rep_2
## sizeFactor
## <numeric>
## 1_um_5adC_rep_1 0.851980611293044
## 1_um_5adC_rep_2 0.848279595118718
## 1_um_5adC_rep_3 0.900285964389282
## 1_um_5adC_rep_4 1.07172382023083
## 5_um_5adC_rep_1 1.38338182362141
## 5_um_5adC_rep_2 1.10690151339656
head(assay(dds))
## 1_um_5adC_rep_1 1_um_5adC_rep_2 1_um_5adC_rep_3
## ENSMAUG00000000002 1001 1337 1336
## ENSMAUG00000000004 5758 7318 7258
## ENSMAUG00000000005 5 2 2
## ENSMAUG00000000006 14640 17361 17928
## ENSMAUG00000000008 0 0 0
## ENSMAUG00000000010 21753 26360 28254
## 1_um_5adC_rep_4 5_um_5adC_rep_1 5_um_5adC_rep_2
## ENSMAUG00000000002 1612 2412 1762
## ENSMAUG00000000004 8802 12752 10116
## ENSMAUG00000000005 2 0 2
## ENSMAUG00000000006 21259 27569 25418
## ENSMAUG00000000008 0 0 0
## ENSMAUG00000000010 32936 42322 38865
## 5_um_5adC_rep_3 5_um_5adC_rep_4 Naive_rep1 Naive_rep2
## ENSMAUG00000000002 1540 1465 1368 1490
## ENSMAUG00000000004 8431 7952 7194 7747
## ENSMAUG00000000005 0 3 0 1
## ENSMAUG00000000006 19777 19002 18877 18467
## ENSMAUG00000000008 0 0 1 0
## ENSMAUG00000000010 31088 28897 28091 29058
## Naive_rep3 Naive_rep4 Vehicle_rep1 Vehicle_rep2 Vehicle_rep3
## ENSMAUG00000000002 1239 1035 1998 1970 9585
## ENSMAUG00000000004 5852 5643 11140 10190 24510
## ENSMAUG00000000005 0 0 2 1 1
## ENSMAUG00000000006 15495 15016 27658 22377 28300
## ENSMAUG00000000008 1 0 0 0 0
## ENSMAUG00000000010 23597 24018 41923 34782 45531
## Vehicle_rep4
## ENSMAUG00000000002 2263
## ENSMAUG00000000004 13687
## ENSMAUG00000000005 1
## ENSMAUG00000000006 31726
## ENSMAUG00000000008 0
## ENSMAUG00000000010 48759
head(rowRanges(dds))
## GRangesList object of length 6:
## $ENSMAUG00000000002
## GRanges object with 0 ranges and 0 metadata columns:
## seqnames ranges strand
## <Rle> <IRanges> <Rle>
##
## $ENSMAUG00000000004
## GRanges object with 0 ranges and 0 metadata columns:
## seqnames ranges strand
##
## $ENSMAUG00000000005
## GRanges object with 0 ranges and 0 metadata columns:
## seqnames ranges strand
##
## ...
## <3 more elements>
## -------
## seqinfo: no sequences
str(counts(dds))
## int [1:15971, 1:16] 1001 5758 5 14640 0 21753 0 0 0 93694 ...
## - attr(*, "dimnames")=List of 2
## ..$ : chr [1:15971] "ENSMAUG00000000002" "ENSMAUG00000000004" "ENSMAUG00000000005" "ENSMAUG00000000006" ...
## ..$ : chr [1:16] "1_um_5adC_rep_1" "1_um_5adC_rep_2" "1_um_5adC_rep_3" "1_um_5adC_rep_4" ...
#Set parameters according to platform
if (Platform=="RNA-Seq"){
MinCount<- 1
alpha <- pAdjValue <- 0.05 # Relaxed from 0.01
linear_fc_filter = 1.5
} else if (Platform=="TempO-Seq") {
MinCount<- 0.5
alpha <- pAdjValue<- 0.05
linear_fc_filter = 1.5
} else { print("Platform/technology not recognized") }
# Make regularized log object for later plotting
#rld <- tryCatch(rlog(dds), error = function(e) { rlog(dds, fitType = 'mean') })
# Use vst for hundreds of samples!
rld <- vst(dds) # Should this be blind?
The code above (not shown by default) uses DESeq2 1.24.0 to test for differentially abundant genes within the RNA-Seq data.
Prior to running DESeq2, the data was filtered to remove samples that do not have 10^{6} reads per sample.
The user-provided metadata initially included 18 samples.
The count matrix initially included 19 samples (including any reference material samples). After removing samples with less than 10^{6} reads, 19 samples were left. It is TRUE that all the samples provided in the metadata table were also identified in the count matrix. It is FALSE that all the samples in the count matrix were also identified in the metadata table.
Following the removal of other samples (i.e., reference RNA, etc.), there were 16 samples remaining in the experiment. The samples excluded from analysis were named: Undetermined.
This table shows the final list of samples that were used in the data analysis (as well as corresponding sample information as it was communicated to the Genomics Laboratory).
knitr::kable(DESeqDesign,
row.names = F,
caption="User-provided information about samples and experimental conditions") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>%
scroll_box(width = "720px")
| dose | treatment | replicate | group | original_names |
|---|---|---|---|---|
| 1 | 5adC | rep1 | 1 5adC | 1_um_5adC_rep_1 |
| 1 | 5adC | rep2 | 1 5adC | 1_um_5adC_rep_2 |
| 1 | 5adC | rep3 | 1 5adC | 1_um_5adC_rep_3 |
| 1 | 5adC | rep4 | 1 5adC | 1_um_5adC_rep_4 |
| 5 | 5adC | rep1 | 5 5adC | 5_um_5adC_rep_1 |
| 5 | 5adC | rep2 | 5 5adC | 5_um_5adC_rep_2 |
| 5 | 5adC | rep2 | 5 5adC | 5_um_5adC_rep_3 |
| 5 | 5adC | rep4 | 5 5adC | 5_um_5adC_rep_4 |
| 0 | Naive | rep1 | 0 Naive | Naive_rep1 |
| 0 | Naive | rep2 | 0 Naive | Naive_rep2 |
| 0 | Naive | rep3 | 0 Naive | Naive_rep3 |
| 0 | Naive | rep4 | 0 Naive | Naive_rep4 |
| 0 | Vehicle | rep1 | 0 Vehicle | Vehicle_rep1 |
| 0 | Vehicle | rep2 | 0 Vehicle | Vehicle_rep2 |
| 0 | Vehicle | rep3 | 0 Vehicle | Vehicle_rep3 |
| 0 | Vehicle | rep4 | 0 Vehicle | Vehicle_rep4 |
###################################################################################
#DEFINE FUNCTIONS
###################################################################################
plot.barplots<-function(samples,b) {
color <- NULL
for (h in 1:ncol(norm_data)){
if (substring(colnames(norm_data)[h], 1, 3) == substring(condition2, 1, 3)) { color <- c(color, "red3") } else { color <- c(color, "darkgrey")}
}
fileNamePlot <- paste0(b, row.names(samples)[b], ".png")
pseudoTitle <- paste0(row.names(samples)[b], "_pAdj:", samples[b,"padj"])
png(file=paste(fileNamePlot, sep="/"), width=1200, height=700, pointsize=20)
par(mar=c(8,4,3,1))
barplot(as.numeric(norm_data[row.names(samples)[b],]), las=2, col=color, main=pseudoTitle, cex.names=0.5, cex.axis=0.8, names.arg=colnames(norm_data))
dev.off()
} #plot.barplots function done
###################################################################################
draw.barplots<-function(samples, top_bottom, NUM){
if (nrow(samples) == 0) {
#print("no genes to plot")
} else {
if (top_bottom == "top") {
#print(paste0("drawing Top ", NUM, " plots"))
if (nrow(samples) <= NUM) {
for (b in 1:nrow(samples)) {plot.barplots(samples,b)}
}
if (nrow(samples) > NUM) {
for (b in 1:NUM) {plot.barplots(samples,b)}
}
}
if (top_bottom == "bottom") {
#print(paste0("drawing Bottom", NUM, " plots"))
if (nrow(samples) <= NUM) {
for (b in 1:nrow(samples)) {plot.barplots(samples,b)}
}
if (nrow(samples) > NUM) {
for (b in ((nrow(samples)-NUM+1):nrow(samples))) {plot.barplots(DEsamples,b)}
}
}}
} #draw.barplots function done
###################################################################################
###################################################################################
The code above (not shown by default) loads in plotting functions specific to the Omics Data Analysis Frameworks for Regulatory application (R-ODAF) template. More information on the R-ODAF framework is available here.
cooks <- FALSE # Normally set Cook's cutoff to false
resList <- list()
for (x in 1:nrow(contrasts)) { ## for all comparisons to be done
condition1 <- contrasts[x,2]
condition2 <- contrasts[x,1]
print(paste(condition2, " vs ", condition1, ":", NORM_TYPE))
DE_Design <- matrix(data=NA, ncol=2)
DE_Design <- DESeqDesign[c(grep(condition1,DESeqDesign[,DESIGN]), grep(condition2,DESeqDesign[,DESIGN])),]
samples <- sampleData[, rownames(DE_Design) ]
colnames(samples) <- NULL
###########
print(paste0("Filtering genes: 75% of at least 1 group need to be above ", MinCount, " CPM"))
print("AND")
print("Detecting spurious spikes: Max-Median > Sum/(Rep+1)" )
SampPerGroup<-table(DE_Design[,DESIGN])
idx<-FlagSpike<-NameRows<-NULL
Counts<-counts(dds, normalized=TRUE)
CPMdds<-cpm(counts(dds, normalized=TRUE))
for (gene in 1:nrow(dds)) {
GroupsPass <- checkSpike<-NULL
for (group in 1:length(SampPerGroup)) { #test if group passes
sampleCols<-grep(dimnames(SampPerGroup)[[1]][group],DE_Design[,DESIGN])
Check<-sum(CPMdds[gene,sampleCols] >= MinCount)>= 0.75*SampPerGroup[group]
GroupsPass<-c(GroupsPass, Check)
if (Check == FALSE) {checkSpike<- c(checkSpike, Check)} else {
checkSpike<-c(checkSpike, ((max(Counts[gene,sampleCols])-median(Counts[gene,sampleCols])) >=
(sum(Counts[gene,sampleCols])/(SampPerGroup[group]+1))))
}
}
idx <- c(idx, as.logical(sum(GroupsPass)))
if (sum(checkSpike) >=1) {
FlagSpike<-rbind(FlagSpike, Counts[gene,])
NameRows<<-c(NameRows, row.names(Counts)[gene])
row.names(FlagSpike)<-NameRows
}
}
print("Obtaining the DESeq2 results")
currentContrast <- c(DESIGN, condition2, condition1)
res <- results(dds[idx],
parallel = TRUE, BPPARAM=MulticoreParam(39),
contrast=currentContrast,
pAdjustMethod= 'fdr',
cooksCutoff=cooks) # Cooks cutoff disabled - manually inspect.
res <- lfcShrink(dds=dds[idx],
contrast=currentContrast,
res=res,
type="ashr")
#Make new directory for the ODAF-specific files
ODAFdir <- paste(outputdir, "/R-ODAF/", sep="")
if(!dir.exists(ODAFdir)) {dir.create(ODAFdir)}
setwd(ODAFdir)
FileName <- paste(NORM_TYPE, condition2,"vs",condition1, "FDR", pAdjValue, sep="_")
#Save output tables
norm_data <<- counts(dds[idx],normalized=TRUE)
colnames(norm_data) <- colData(dds)[,DESIGN]
write.table(norm_data,file=paste0(FileName, "_Norm_Data.txt"), sep="\t", quote=FALSE)
write.table(FlagSpike,file=paste0(FileName, "_FlaggedSpikes.txt"), sep="\t", quote=FALSE)
DEsamples <<- subset(res,res$padj < pAdjValue)
write.table(DEsamples,file=paste0(FileName,"_DEG_table.txt"), sep="\t", quote=FALSE)
DEspikes <<- DEsamples[rownames(DEsamples)%in%NameRows,]
write.table(DEspikes,file=paste0(FileName,"_DEspikes_table.txt"), sep="\t",quote=FALSE)
resList[[x]] <- res
if (R_ODAF_plots==TRUE) {
print("creating Read count Plots")
# top DEGs
plotdir<- paste(outputdir, "/plots/", sep="")
if(!dir.exists(plotdir)) {dir.create(plotdir)}
barplot.dir<- paste(plotdir, "/barplot_genes/", sep="")
if(!dir.exists(barplot.dir)) {dir.create(barplot.dir)}
TOPbarplot.dir<- paste(barplot.dir, "Top_DEGs/", sep="")
if(!dir.exists(TOPbarplot.dir)) {dir.create(TOPbarplot.dir)}
setwd(TOPbarplot.dir)
draw.barplots(DEsamples, "top", 20) #(DEsamples, top_bottom, NUM)
print("Top 20 DEG plots done")
# Spurious spikes
SPIKEbarplot.dir<- paste(barplot.dir, "DE_Spurious_spikes/", sep="")
if(!dir.exists(SPIKEbarplot.dir)) {dir.create(SPIKEbarplot.dir)}
setwd(SPIKEbarplot.dir)
draw.barplots(DEspikes, "top", nrow(DEspikes)) #(DEsamples, top_bottom, NUM)
print("All DE_Spurious_spike plots done")
}
}
## [1] "1 5adC vs 0 Naive : 2020__DESeq2_RNA-Seq"
## [1] "Filtering genes: 75% of at least 1 group need to be above 1 CPM"
## [1] "AND"
## [1] "Detecting spurious spikes: Max-Median > Sum/(Rep+1)"
## [1] "Obtaining the DESeq2 results"
## [1] "5 5adC vs 0 Naive : 2020__DESeq2_RNA-Seq"
## [1] "Filtering genes: 75% of at least 1 group need to be above 1 CPM"
## [1] "AND"
## [1] "Detecting spurious spikes: Max-Median > Sum/(Rep+1)"
## [1] "Obtaining the DESeq2 results"
## [1] "1 5adC vs 0 Vehicle : 2020__DESeq2_RNA-Seq"
## [1] "Filtering genes: 75% of at least 1 group need to be above 1 CPM"
## [1] "AND"
## [1] "Detecting spurious spikes: Max-Median > Sum/(Rep+1)"
## [1] "Obtaining the DESeq2 results"
## [1] "5 5adC vs 0 Vehicle : 2020__DESeq2_RNA-Seq"
## [1] "Filtering genes: 75% of at least 1 group need to be above 1 CPM"
## [1] "AND"
## [1] "Detecting spurious spikes: Max-Median > Sum/(Rep+1)"
## [1] "Obtaining the DESeq2 results"
## [1] "0 Vehicle vs 0 Naive : 2020__DESeq2_RNA-Seq"
## [1] "Filtering genes: 75% of at least 1 group need to be above 1 CPM"
## [1] "AND"
## [1] "Detecting spurious spikes: Max-Median > Sum/(Rep+1)"
## [1] "Obtaining the DESeq2 results"
The code above (not shown by default) runs the R-ODAF spurious spike detection and outputs the DESeq Results objects as a list for each contrast. As specified by the R-ODAF guidelines, 75% of at least 1 group need to be above 1 CPM and spurious spikes were removed in which Max-Median > Sum/(Rep+1).
The log2FoldChange shrinkage procedure used was ashr. An alpha of 0.1 was used to extract raw results, which are reported as the Wald test p-value. To account for multiple testing, fdr adjusted p-values are reported. Cook’s cutoff was set to FALSE in this analysis.
#listEnsembl()
#listMarts()
#listDatasets(useMart('ensembl'))
ensembl <- useMart("ensembl", dataset = "mauratus_gene_ensembl", host = "useast.ensembl.org") # Mouse: "mmusculus_gene_ensembl" # Human: "hsapiens_gene_ensembl" # Golden hamster: mauratus_gene_ensembl # Rat: rnorvegicus_gene_ensembl
#listAttributes(ensembl)
#listFilters(ensembl)
genes <- row.names(resList[[1]])
#genes_all <- unique(row.names(assay(dds)))
setwd(outputdir)
if (file.exists("id_table.Rdata")) {
print(paste("Already found ID table; skipping biomaRt query and loading from disk."))
load("./id_table.Rdata")
id_table <- id_table_entrez[,1:3]
} else {
id_table_entrez <- getBM(filters="ensembl_gene_id",
attributes= c("ensembl_gene_id",
"external_gene_name", # mgi_symbol for Mouse
"description",
"entrezgene_id"), # "refseq_mrna" or "refseq_peptide" may be of interest, but can't join with gene tables.
values=genes,
mart=ensembl)
save(id_table_entrez, file="./id_table.Rdata")
id_table <- id_table_entrez[,1:3]
}
## [1] "Already found ID table; skipping biomaRt query and loading from disk."
genes_entrezid <- dplyr::left_join(data.frame(ensembl_gene_id=genes), id_table_entrez, by="ensembl_gene_id")
sigtabList <- list()
alltablList <- list()
for (i in 1:length(resList)) {
print(i)
sigTab <- resList[[i]]
# Add taxonomy
if (nrow(sigTab) == 0) {
next
} else {
sigTab <- cbind(ensembl_gene_id=row.names(resList[[i]]),
as(sigTab, "data.frame"),
contrast = gsub(pattern = paste0("log2.*", DESIGN, "\ "),
replacement = "",
x = resList[[i]]@elementMetadata[[2]][2]))
sigTab <- dplyr::left_join(sigTab,
id_table,
by="ensembl_gene_id")
sigTab <- dplyr::mutate(sigTab, linearFoldChange=ifelse(log2FoldChange > 0,
2 ^ log2FoldChange,
-1 / (2 ^ log2FoldChange)))
#sigTab <- sigTab[,c(1,9,10,2,3,11,5:8)] # Reorder columns
sigTab <- sigTab[,c(1,8,9,2,3,10,4:7)]
alltablList[[i]] <- sigTab
sigTab <- sigTab[!is.na(sigTab$padj) & sigTab$padj < alpha & abs(sigTab$linearFoldChange) > linear_fc_filter, ] ## FILTERS!
sigtabList[[i]] <- sigTab %>% dplyr::distinct()
}
}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
# Write dataframe of all results
sigtabList <- sigtabList[!sapply(sigtabList, is.null)]
significantResults <- do.call(rbind, sigtabList)
significantResults <- dplyr::distinct(significantResults)
allResults <- do.call(rbind, alltablList)
# All Results for Plotting
# Replace NA gene symbols with ensembl ID
allResults$external_gene_name[is.na(allResults$external_gene_name)] <- allResults$ensembl_gene_id[is.na(allResults$external_gene_name)]
# Replace blank gene symbols with ensembl ID
allResults$external_gene_name[allResults$external_gene_name == ""] <- allResults$ensembl_gene_id[allResults$external_gene_name == ""]
allResults$padj[allResults$padj == 0] <- 10^-100 # For plotting purposes!
allResultsOrdered_logFC_filter <- dplyr::filter(allResults, abs(linearFoldChange) > 1.5) %>%
arrange(-abs(linearFoldChange))
allResultsOrdered_logFC_filter <- dplyr::filter(allResultsOrdered_logFC_filter, padj < alpha)
res.df <- allResultsOrdered_logFC_filter
degTable <- significantResults %>%
dplyr::group_by(contrast) %>%
dplyr::count()
lengths <- lapply(resList, nrow)
longest <- which.max(lengths)
summaryTable <- data.frame( ensembl_gene_id=row.names(resList[[longest]]),
baseMean=resList[[longest]]$baseMean )
contrastsInSummary <- vector()
for (i in 1:length(resList)) {
print(i)
n <- resList[[i]]@elementMetadata[[2]][2]
n <- gsub(pattern = paste0("log2\ fold\ change\ \\(MMSE\\):\ ",DESIGN), # Might need to be MLE for some tests
replacement = paste0("log2 Fold Change"),
x = resList[[i]]@elementMetadata[[2]][2])
p <- gsub(pattern = paste0("log2\ fold\ change\ \\(MMSE\\):\ ",DESIGN),
replacement = resList[[i]]@elementMetadata[[2]][6], # Not used?
x = resList[[i]]@elementMetadata[[2]][2])
q <- gsub(pattern = paste0("log2\ fold\ change\ \\(MMSE\\):\ ",DESIGN,"\ "),
replacement = "",
x = resList[[i]]@elementMetadata[[2]][2])
message(n)
message(p)
message(q)
toJoin <- as.data.frame(resList[[i]])
setDT(toJoin, keep.rownames = T)[]
setnames(toJoin, 1, "ensembl_gene_id")
toJoin <- mutate(toJoin, linearFoldChange=ifelse(log2FoldChange > 0,
2 ^ log2FoldChange,
-1 / (2 ^ log2FoldChange)))
toJoin <- toJoin[,c(1:3,7,4:6)]
summaryTable <- dplyr::left_join(summaryTable, dplyr::select(toJoin, !c(baseMean,pvalue,lfcSE)), by="ensembl_gene_id")
names(summaryTable)[[ncol(summaryTable)-2]] <- paste0("log2FoldChange_",i)
names(summaryTable)[[ncol(summaryTable)-1]] <- paste0("linearFoldChange_",i)
names(summaryTable)[[ncol(summaryTable)]] <- paste0("FDR_",i)
contrastsInSummary[i] <- q
print(summary(resList[[i]], pAdjValue))
}
## [1] 1
##
## out of 11554 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 1776, 15%
## LFC < 0 (down) : 1600, 14%
## outliers [1] : 0, 0%
## low counts [2] : 0, 0%
## (mean count < 5)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
##
## NULL
## [1] 2
##
## out of 11554 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 2585, 22%
## LFC < 0 (down) : 2467, 21%
## outliers [1] : 0, 0%
## low counts [2] : 0, 0%
## (mean count < 5)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
##
## NULL
## [1] 3
##
## out of 11554 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 3516, 30%
## LFC < 0 (down) : 3378, 29%
## outliers [1] : 0, 0%
## low counts [2] : 0, 0%
## (mean count < 5)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
##
## NULL
## [1] 4
##
## out of 11554 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 3797, 33%
## LFC < 0 (down) : 3591, 31%
## outliers [1] : 0, 0%
## low counts [2] : 0, 0%
## (mean count < 5)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
##
## NULL
## [1] 5
##
## out of 11554 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 2368, 20%
## LFC < 0 (down) : 2328, 20%
## outliers [1] : 0, 0%
## low counts [2] : 224, 1.9%
## (mean count < 13)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
##
## NULL
maxFCs <- allResults %>%
dplyr::group_by(ensembl_gene_id) %>%
dplyr::filter(abs(linearFoldChange) == max(abs(linearFoldChange))) %>%
dplyr::ungroup() %>%
dplyr::select(ensembl_gene_id, linearFoldChange)
minPvals <- allResults %>%
group_by(ensembl_gene_id) %>%
dplyr::filter(padj == min(padj)) %>%
dplyr::ungroup() %>%
dplyr::select(ensembl_gene_id, padj)
summaryTable <- summaryTable %>%
left_join(id_table, by="ensembl_gene_id") %>%
left_join(maxFCs, by="ensembl_gene_id") %>%
left_join(minPvals, by="ensembl_gene_id") %>%
dplyr::rename(maxFoldChange = linearFoldChange,
minFDR_pval = padj
) %>%
mutate(maxFoldChange=abs(maxFoldChange))
numColsToPrepend <- ncol(summaryTable) - 3*length(resList) - 2 # Number of columns per contrast = 3. Subtract two for the baseMean and ensembl_gene_id columns.
colPositionsToPrependSTART <- ncol(summaryTable) - numColsToPrepend + 1
colPositionsOfData <- ncol(summaryTable) - numColsToPrepend
summaryTable <- summaryTable[,c(1,
colPositionsToPrependSTART:ncol(summaryTable),
2:colPositionsOfData)]
CPMddsDF <- data.frame(ensembl_gene_id = row.names(CPMdds), CPMdds, check.names=F)
CPMddsDF <- dplyr::left_join(CPMddsDF, id_table, by="ensembl_gene_id")
numColsToPrepend <- ncol(CPMddsDF) - ncol(CPMdds) - 1
colPositionsToPrependSTART <- ncol(CPMddsDF) - numColsToPrepend + 1
colPositionsOfData <- ncol(CPMddsDF) - numColsToPrepend
CPMddsDF <- CPMddsDF[,c(1,colPositionsToPrependSTART:ncol(CPMddsDF),2:colPositionsOfData)]
The code above (not shown by default) generates tables summarzing the differentially expressed genes (DEGs).
Here is the number of DEGs in each group:
kable(degTable,
caption="Number of differentially expressed genes across each contrast") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
| contrast | n |
|---|---|
| 1 5adC vs 0 Naive | 693 |
| 5 5adC vs 0 Naive | 1195 |
| 1 5adC vs 0 Vehicle | 1292 |
| 5 5adC vs 0 Vehicle | 1786 |
| 0 Vehicle vs 0 Naive | 212 |
#######################################
### Write results table from DESeq2
#######################################
setwd(outputdir)
write.table(allResults, file=paste0(NORM_TYPE,"-DESeq_output_ALL.txt"), quote=F, sep='\t', col.names=NA)
write.table(significantResults, file=paste0(NORM_TYPE,"-DESeq_output_significant.txt"), quote=F, sep='\t', col.names=NA)
write.table(summaryTable, file=paste0(NORM_TYPE,"-DESeq_output_all_genes.txt"), quote=F, sep='\t', col.names=NA)
write.table(CPMddsDF, file=paste0(NORM_TYPE,"-Per_sample_CPM.txt"), quote=F, sep='\t', col.names=NA)
write.table(Counts, file=paste0(NORM_TYPE,"-Per_sample_normalized_counts.txt"), quote=F, sep='\t', col.names=NA)
The code above (not shown by default) writes text files for each DEG summary type.
setwd(outputdir)
#######################################
### Write results above but in Excel
#######################################
### Global options
options("openxlsx.borderColour" = "#4F80BD")
options("openxlsx.borderStyle" = "thin")
options("openxlsx.maxWidth" = 50)
hs1 <- createStyle(textDecoration = "Bold",
border = "Bottom",
fontColour = "black")
hs2 <- createStyle(textDecoration = "Bold",
border = c("top", "bottom", "left", "right"),
fontColour = "black",
fgFill="#C5D9F1")
### Summary results - one gene per line, columns are contrasts
wb1 <- createWorkbook()
modifyBaseFont(wb1, fontSize = 10, fontName = "Arial Narrow")
addWorksheet(wb1, "DESeq_results_per_gene")
for (j in 1:length(contrastsInSummary)) {
myStartcol=7+((j-1)*3)
myEndcol=9+((j-1)*3)
mergeCells(wb1,
sheet = 1,
cols = myStartcol:myEndcol,
rows = 1)
writeData(
wb1,
sheet = 1,
x = contrastsInSummary[j],
startCol = myStartcol,
startRow = 1)
}
conditionalFormatting(wb1,
sheet = 1,
rows = 1,
cols = 1:ncol(summaryTable),
type = "contains",
rule = "",
style=hs2)
freezePane(wb1, sheet = 1, firstActiveRow = 3, firstActiveCol = 4)
writeDataTable(wb1,
sheet = 1,
startRow = 2,
x = summaryTable,
colNames = TRUE,
rowNames = F,
tableStyle = "none",
headerStyle = hs1,
keepNA = T,
na.string = "NA")
setColWidths(wb1, sheet = 1, cols = 1:6, widths = "auto") # This is hard-coded, so prone to error; will only impact auto adjustment of col widths.
setColWidths(wb1, sheet = 1, cols = 7:ncol(summaryTable), widths = 13) # This is hard-coded, so prone to error; will only impact auto adjustment of col widths.
fname1 <- paste0("1.",NORM_TYPE,"-DESeq_by_gene.xlsx")
saveWorkbook(wb1, fname1, overwrite = TRUE)
### All results in one table
wb2 <- createWorkbook()
modifyBaseFont(wb2, fontSize = 10, fontName = "Arial Narrow")
addWorksheet(wb2, paste0("FDR",pAdjValue,".Linear.FC.",linear_fc_filter))
freezePane(wb2, sheet = 1, firstRow = TRUE, firstActiveCol = 4)
writeDataTable(wb2,
sheet = 1,
x = significantResults,
colNames = TRUE,
rowNames = F,
tableStyle = "none",
headerStyle = hs1,
keepNA = T,
na.string = "NA")
setColWidths(wb2, sheet = 1, cols = 1:ncol(significantResults), widths = "auto")
addWorksheet(wb2, "DESeq_all_results")
freezePane(wb2, sheet = 2, firstRow = TRUE, firstActiveCol = 4)
writeDataTable(wb2,
sheet = 2,
x = allResults,
colNames = TRUE,
rowNames = F,
tableStyle = "none",
headerStyle = hs1,
keepNA = T,
na.string = "NA")
setColWidths(wb2, sheet = 2, cols = 1:ncol(allResults), widths = "auto")
saveWorkbook(wb2, paste0("2.",NORM_TYPE,"-DESeq_all.xlsx"), overwrite = TRUE)
### All results with different tabs for each contrast
wb3 <- createWorkbook()
modifyBaseFont(wb3, fontSize = 10, fontName = "Arial Narrow")
short_contrast_names <- stringr::str_trunc(short_contrast_names, 30, side="center")
for (i in 1:length(levels(allResults$contrast))) {
print(i)
dataToWrite <- allResults[allResults$contrast==levels(allResults$contrast)[i],]
addWorksheet(wb3, short_contrast_names[i])
freezePane(wb3, sheet = i, firstRow = TRUE, firstActiveCol = 4)
writeDataTable(wb3,
sheet = i,
x = dataToWrite,
colNames = TRUE,
rowNames = F,
tableStyle = "none",
headerStyle = hs1,
keepNA = T,
na.string = "NA")
setColWidths(wb3, sheet = i, cols = 1:ncol(dataToWrite), widths = "auto")
}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
saveWorkbook(wb3, paste0("3.",NORM_TYPE,"-DESeq_by_contrast.xlsx"), overwrite = TRUE)
### CPM
wb4 <- createWorkbook()
modifyBaseFont(wb4, fontSize = 10, fontName = "Arial Narrow")
addWorksheet(wb4, "Counts per million")
freezePane(wb4, sheet = 1, firstRow = TRUE, firstActiveCol = 4)
writeDataTable(wb4,
sheet = 1,
x = as.data.frame(CPMddsDF),
colNames = TRUE,
rowNames = F,
tableStyle = "none",
headerStyle = hs1,
keepNA = T,
na.string = "NA")
setColWidths(wb4, sheet = 1, cols = 1:ncol(CPMddsDF), widths = "auto")
saveWorkbook(wb4, paste0("4.",NORM_TYPE,"-CPM.xlsx"), overwrite = TRUE)
The code above (not shown by default) writes Excel workbooks and text files of DEG lists.
These files should be provided to you as a separate zip file.
setwd(outputdir)
xfun::embed_files(list.files(".", "[.](xlsx)$"), name = "RNASeq_Spreadsheets.zip")
setwd(outputdir)
xfun::embed_files(list.files(".", "[.](txt)$"), name = "RNASeq_Text_Files.zip")
setwd(outputdir)
xfun::embed_dir(ODAFdir, name = "RNASeq_R-ODAF_text_files.zip")
# Save RData here if flag is TRUE.
setwd(outputdir)
save.image(file = "Partial_analysis.RData")
# Add a flag to the start of the script with if statements in all code chunks to check if the flag is set.
# If flag is TRUE, run analysis; if flag is FALSE, SKIP analysis and continue here (just to modify plotting output).
## Perform PCA analysis and make plot
plotPCA(rld, intgroup = intgroup, ntop=nrow(assay(rld)))
## Get percent of variance explained
data_pca <- plotPCA(rld, intgroup = c("dose","group","treatment"), ntop=nrow(assay(rld)), returnData = TRUE)
percentVar <- round(100 * attr(data_pca, "percentVar"))
# PCAplot <- plotPCA(rld, intgroup = c("dose","group","treatment"), ntop=nrow(assay(rld)), returnData = TRUE)
# ggplot(PCAplot, aes(PC1, PC2, color=dose)) +
# geom_point(size=1) +
# xlab(paste0("PC1: ",percentVar[1],"% variance")) +
# ylab(paste0("PC2: ",percentVar[2],"% variance")) +
# coord_fixed()
This plot shows the first two principal components that explain the variability in the data using the regularized log count data. If you are unfamiliar with principal component analysis, you might want to check the Wikipedia entry or this interactive explanation. In this case, the first and second principal component explain 60 and 16 percent of the variance respectively.
rld_degs <- rld[row.names(assay(rld)) %in% significantResults$ensembl_gene_id,]
## Perform PCA analysis and make plot
plotPCA(rld_degs, intgroup = c("dose","group","treatment"), ntop=nrow(rld_degs)) #+ theme(legend.position = "none")
PCAplotDEGs <- plotPCA(rld_degs, intgroup = c("dose","group","treatment"), ntop=nrow(assay(rld_degs)), returnData = TRUE)
# ggplot(PCAplotDEGs, aes(PC1, PC2, color=dose)) +
# geom_point(size=1) +
# xlab(paste0("PC1: ",percentVar[1],"% variance")) +
# ylab(paste0("PC2: ",percentVar[2],"% variance")) +
# facet_wrap(~group) +
# coord_fixed()
## Get percent of variance explained
data_pcaDEGs <- plotPCA(rld_degs, intgroup = intgroup, returnData = TRUE, ntop=nrow(rld_degs))
percentVarDEGs <- round(100 * attr(data_pcaDEGs, "percentVar"))
This plot shows the principal components analysis limited to all DEGs. In this case, the first and second principal component explain 78 and 12 percent of the variance respectively.
# Run this code only once for both the PCA and clustering analysis
rv = rowVars(assay(rld))
select = order(rv, decreasing = TRUE)[1:nBest]
rld_top <- rld[select,]
## Perform PCA analysis and make plot
plotPCA(rld_top, intgroup = intgroup, ntop=nrow(rld_top))
## Get percent of variance explained
data_pcaTop <- plotPCA(rld_top, intgroup = intgroup, returnData = TRUE, ntop=nrow(rld_top))
percentVarTop <- round(100 * attr(data_pcaTop, "percentVar"))
This plot shows the principal components analysis limited to all DEGs. In this case, the first and second principal component explain 88 and 7 percent of the variance respectively.
## Obtain the sample euclidean distances
sampleDists <- dist(t(assay(rld)))
sampleDistMatrix <- as.matrix(sampleDists)
## Add names based on intgroup
rownames(sampleDistMatrix) <- apply(as.data.frame(colData(rld)[, intgroup]), 1,
paste, collapse = ' : ')
colnames(sampleDistMatrix) <- NULL
## Define colors to use for the heatmap
colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
## Make the heatmap
pheatmap(sampleDistMatrix, clustering_distance_rows = sampleDists,
clustering_distance_cols = sampleDists, color = colors)
This plot shows how samples are clustered based on their euclidean distance using the regularized log transformed count data. This figure gives an overview of how the samples are hierarchically clustered. It is a complementary figure to the PCA plot.
## Limit to the top n genes
# rv = rowVars(assay(rld))
# select = order(rv, decreasing = TRUE)[1:nBest]
## Obtain the sample euclidean distances
# sampleDistsTop <- dist(t(assay(rld)[select,]))
sampleDistsDEGs <- dist(t(assay(rld_degs)))
# sampleDistMatrixTop <- as.matrix(sampleDistsTop)
sampleDistMatrixDEGs <- as.matrix(sampleDistsDEGs)
## Add names based on intgroup
rownames(sampleDistMatrixDEGs) <- apply(as.data.frame(colData(rld)[, intgroup]), 1,
paste, collapse = ' : ')
colnames(sampleDistMatrixDEGs) <- NULL
## Define colors to use for the heatmap
colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
## Make the heatmap
pheatmap(sampleDistMatrixDEGs,
clustering_distance_rows = sampleDistsDEGs,
clustering_distance_cols = sampleDistsDEGs,
color = colors)
This plot shows how samples are clustered based on their euclidean distance using the regularized log transformed count data of all DEGs. This figure gives an overview of how the samples are hierarchically clustered. It is a complementary figure to the PCA plot.
## Limit to the top n genes
## Using the select object from PCA code above...
## Obtain the sample euclidean distances
sampleDistsTop <- dist(t(assay(rld)[select,]))
sampleDistMatrixTop <- as.matrix(sampleDistsTop)
## Add names based on intgroup
rownames(sampleDistMatrixTop) <- apply(as.data.frame(colData(rld)[, intgroup]), 1,
paste, collapse = ' : ')
colnames(sampleDistMatrixTop) <- NULL
## Define colors to use for the heatmap
colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
## Make the heatmap
pheatmap(sampleDistMatrixTop,
clustering_distance_rows = sampleDistsTop,
clustering_distance_cols = sampleDistsTop,
color = colors)
This plot shows how samples are clustered based on their euclidean distance using the regularized log transformed count data of the top 100 most variable genes. This figure gives an overview of how the samples are hierarchically clustered. It is a complementary figure to the PCA plot.
mat <- assay(rld)[row.names(assay(rld)) %in% significantResults$ensembl_gene_id,]
mat <- mat - rowMeans(mat)
# quantile_breaks <- function(xs, n = 10) {
# breaks <- quantile(xs, probs = seq(0, 1, length.out = n))
# breaks[!duplicated(breaks)]
# }
#
# mat_breaks <- quantile_breaks(mat, n = 11)
# pheatmap(mat,
# annotation_col=heatmap_df,
# show_rownames = FALSE,
# border_color = NA,
# scale="row",
# breaks = mat_breaks)
heatmap_df <- as.data.frame(colData(rld)[,c("dose","group","treatment")]) # Customize!
pheatmap(mat,
annotation_col=heatmap_df,
show_rownames = FALSE,
border_color = NA,
scale="row")
# color = inferno(10)
chems=unique(DESeqDesign$chemical)
# plot_list=list()
for (chem in chems){
samples_for_heatmap <- DESeqDesign[which(DESeqDesign$chemical %in% c(chem, "DMSO 0.1%")),]$original_names
mat_subset <- mat[,samples_for_heatmap]
genes_for_heatmap_subset <- row.names(mat_subset)
genes_for_heatmap_subset <- data.frame(ensembl_gene_id=row.names(mat_subset)) %>%
dplyr::left_join(id_table,
by="ensembl_gene_id") %>%
dplyr::distinct()
genes_for_heatmap_subset$external_gene_name[is.na(genes_for_heatmap_subset$external_gene_name)] <- genes_for_heatmap_subset$ensembl_gene_id[is.na(genes_for_heatmap_subset$external_gene_name)]
genes_for_heatmap$external_gene_name[which(genes_for_heatmap$external_gene_name=="")] <- genes_for_heatmap$ensembl_gene_id[which(genes_for_heatmap$external_gene_name=="")]
row.names(genes_for_heatmap_subset) <- genes_for_heatmap_subset$ensembl_gene_id
# pheatmap(mat_subset,
# #color = rev(brewer.pal(11,"RdBu")), # inferno(10),
# annotation_col=dplyr::select(heatmap_df,-original_names),
# labels_row=genes_for_heatmap_subset[,2],
# border_color = NA,
# scale = "row",
# cutree_rows = 3,
# cutree_cols = 4)
mat_subset_ordered <- mat_subset[,dplyr::arrange(heatmap_df[which(heatmap_df$original_names %in% samples_for_heatmap), ],dose)$original_names]
# No clustering on columns
pheatmap(mat_subset_ordered,
#color = rev(brewer.pal(11,"RdBu")), # inferno(10),
annotation_col=dplyr::select(heatmap_df,-original_names),
cluster_cols = F,
labels_row=genes_for_heatmap_subset[,2],
border_color = NA,
scale = "row",
cutree_rows = 3,
cutree_cols = 4)
# x=pheatmap(mat,
# annotation_col=heatmap_df,
# show_rownames = FALSE,
# border_color = NA,
# scale="row")
# plot_list[[a]] = x[[4]] ##to save each plot into a list. note the [[4]]
# }
# do.call(grid.arrange,plot_list)
}
mat_top <- assay(rld)[row.names(assay(rld)) %in% allResultsOrdered_logFC_filter[1:nHeatmapDEGs,]$ensembl_gene_id,]
mat_top <- mat_top - rowMeans(mat_top)
genes_for_heatmap <- row.names(mat_top)
genes_for_heatmap <- data.frame(ensembl_gene_id=row.names(mat_top)) %>%
dplyr::left_join(id_table,
by="ensembl_gene_id") %>%
dplyr::distinct()
## Warning: Column `ensembl_gene_id` joining factor and character vector, coercing
## into character vector
genes_for_heatmap$external_gene_name[is.na(genes_for_heatmap$external_gene_name)] <- genes_for_heatmap$ensembl_gene_id[is.na(genes_for_heatmap$external_gene_name)]
genes_for_heatmap$external_gene_name[which(genes_for_heatmap$external_gene_name=="")] <- genes_for_heatmap$ensembl_gene_id[which(genes_for_heatmap$external_gene_name=="")]
row.names(genes_for_heatmap) <- genes_for_heatmap$ensembl_gene_id
pheatmap(mat_top,
annotation_col=heatmap_df,
labels_row=genes_for_heatmap[,2],
show_rownames = TRUE,
border_color = NA,
scale="row")
# color = inferno(10)
# sliderInput("nHeatmapShiny", label = "Bandwidth adjustment:",
# min = 10, max = 1000, value = 100, step = 10)
# Interactive heatmap
#renderPlot({
rv = rowVars(assay(rld))
select = order(rv, decreasing = TRUE)[1:nHeatmap]
matRV <- assay(rld)[select,]
matRV <- matRV - rowMeans(matRV)
genes_for_heatmap <- row.names(matRV)
genes_for_heatmap <- data.frame(ensembl_gene_id=row.names(matRV)) %>%
dplyr::left_join(id_table,
by="ensembl_gene_id") %>%
dplyr::distinct()
## Warning: Column `ensembl_gene_id` joining factor and character vector, coercing
## into character vector
genes_for_heatmap$external_gene_name[is.na(genes_for_heatmap$external_gene_name)] <- genes_for_heatmap$ensembl_gene_id[is.na(genes_for_heatmap$external_gene_name)]
genes_for_heatmap$external_gene_name[which(genes_for_heatmap$external_gene_name=="")] <- genes_for_heatmap$ensembl_gene_id[which(genes_for_heatmap$external_gene_name=="")]
row.names(genes_for_heatmap) <- genes_for_heatmap$ensembl_gene_id
pheatmap(matRV,
#color = rev(brewer.pal(11,"RdBu")), # inferno(10),
annotation_col=heatmap_df,
labels_row=genes_for_heatmap[,2],
border_color = NA,
scale = "row",
cutree_rows = 3,
cutree_cols = 4)
#})
This section contains three groups of MA plots (see Wikipedia) that compare the mean of the normalized counts against the log fold change. Each of the groups has a tab for each contrast. The plots show one point per feature. The points are shown in red if the feature has an adjusted p-value less than the cutoff listed in each section, that is, the statistically significant features are shown in red.
This first group of plots shows alpha = 0.1, which is the alpha value used to determine which resulting features were significant when running the function DESeq2::results().
## MA plot with alpha used in DESeq2::results()
for (i in 1:length(resList)) {
contrast = gsub(pattern = "log2.*Time\ ",
replacement = "",
x = resList[[i]]@elementMetadata[[2]][2])
cat("###", contrast, " \n")
DESeq2::plotMA(resList[[i]], alpha = metadata(resList[[i]])$alpha, main = paste('MA plot with alpha =',
metadata(resList[[i]])$alpha,',',contrast))
cat('\n\n')
}
This second group of MA plots uses alpha = 0.05 and can be used against the first MA plot to identify features which have adjusted p-values between 0.05 and 0.1.
## MA plot with alpha = 1/2 of the alpha used in DESeq2::results()
for (i in 1:length(resList)) {
contrast = gsub(pattern = "log2.*Time\ ",
replacement = "",
x = resList[[i]]@elementMetadata[[2]][2])
cat("###", contrast, " \n")
DESeq2::plotMA(resList[[i]], alpha = metadata(resList[[i]])$alpha / 2,
main = paste('MA plot with alpha =', metadata(resList[[i]])$alpha / 2,',',contrast))
cat('\n\n')
}
The third and final set of MA plots uses an alpha such that the top 100 features are shown in the plot.
## MA plot with alpha corresponding to the one that gives the nBest features
for (i in 1:length(resList)) {
nBest.actual <- min(nBest, nrow(head(resList[[i]], n = nBest)))
nBest.alpha <- head( resList[[i]][order(resList[[i]]$pvalue),], n = nBest)$padj[nBest.actual]
contrast = gsub(pattern = "log2.*Time\ ",
replacement = "",
x = resList[[i]]@elementMetadata[[2]][2])
cat("###", contrast, " \n")
DESeq2::plotMA(resList[[i]], alpha = nBest.alpha * 1.00000000000001,
main = paste('MA plot for top', nBest.actual, 'features',',',contrast))
cat('\n\n')
}
## P-value histogram plot
ggplot(allResults, aes(x = pvalue)) +
geom_histogram(alpha=.5, position='identity', bins = 50) +
labs(title='Histogram of unadjusted p-values') +
xlab('Unadjusted p-values') +
facet_wrap( ~ contrast, ncol = 2)
This plot shows a histogram of the unadjusted p-values. It might be skewed right or left, or flat as shown in the Wikipedia examples. The shape depends on the percent of features that are differentially expressed. For further information on how to interpret a histogram of p-values check David Robinson’s post on this topic.
## P-value distribution summary
summary(allResults$pvalue)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000000 0.0000509 0.0332513 0.2040367 0.3422967 1.0000000
This is the numerical summary of the distribution of the p-values.
## Split features by different p-value cutoffs
pval_table <- lapply(c(1e-04, 0.001, 0.01, 0.025, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5,
0.6, 0.7, 0.8, 0.9, 1), function(x) {
data.frame('Cut' = x, 'Count' = sum(allResults$pvalue <= x, na.rm = TRUE))
})
pval_table <- do.call(rbind, pval_table)
kable(pval_table, format = 'markdown', align = c('c', 'c'))
| Cut | Count |
|---|---|
| 0.0001 | 15323 |
| 0.0010 | 19054 |
| 0.0100 | 24615 |
| 0.0250 | 27785 |
| 0.0500 | 30679 |
| 0.1000 | 34225 |
| 0.2000 | 38832 |
| 0.3000 | 42164 |
| 0.4000 | 44937 |
| 0.5000 | 47352 |
| 0.6000 | 49584 |
| 0.7000 | 51721 |
| 0.8000 | 53817 |
| 0.9000 | 55886 |
| 1.0000 | 57855 |
This table shows the number of features with p-values less or equal than some commonly used cutoff values.
## Adjusted p-values histogram plot
ggplot(allResults, aes(x = padj)) +
geom_histogram(alpha=.5, position='identity', bins = 50) +
labs(title=paste('Histogram of', elementMetadata(resList[[1]])$description[grep('adjusted', elementMetadata(resList[[1]])$description)])) +
xlab('Adjusted p-values') +
xlim(c(0, 1.0005)) +
facet_wrap( ~ contrast, ncol = 2, scales="free")
## Warning: Removed 224 rows containing non-finite values (stat_bin).
## Warning: Removed 10 rows containing missing values (geom_bar).
This plot shows a histogram of the fdr adjusted p-values. It might be skewed right or left, or flat as shown in the Wikipedia examples.
## Adjusted p-values distribution summary
summary(res.df$padj)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000e+00 0.000e+00 0.000e+00 3.633e-04 3.263e-06 2.440e-02
This is the numerical summary of the distribution of the fdr adjusted p-values.
## Split features by different adjusted p-value cutoffs
padj_table <- lapply(c(1e-04, 0.001, 0.01, 0.025, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5,
0.6, 0.7, 0.8, 0.9, 1), function(x) {
data.frame('Cut' = x, 'Count' = sum(res.df$padj <= x, na.rm = TRUE))
})
padj_table <- do.call(rbind, padj_table)
kable(padj_table, format = 'markdown', align = c('c', 'c'))
| Cut | Count |
|---|---|
| 0.0001 | 4462 |
| 0.0010 | 4843 |
| 0.0100 | 5144 |
| 0.0250 | 5188 |
| 0.0500 | 5188 |
| 0.1000 | 5188 |
| 0.2000 | 5188 |
| 0.3000 | 5188 |
| 0.4000 | 5188 |
| 0.5000 | 5188 |
| 0.6000 | 5188 |
| 0.7000 | 5188 |
| 0.8000 | 5188 |
| 0.9000 | 5188 |
| 1.0000 | 5188 |
This table shows the number of features with fdr adjusted p-values less or equal than some commonly used cutoff values.
This table shows the significant DEGs (passing all filtering criteria) ordered by their absolute fold change. Use the search function to find your feature of interest or sort by one of the columns. You can limit to a single contrast if desired.
searchURL <- "http://www.ncbi.nlm.nih.gov/gene/?term="
## Add search url if appropriate
res.df.dt <- res.df
if(!is.null(searchURL)) {
res.df.dt$ensembl_gene_id <- paste0('<a href="',
searchURL,
res.df.dt$ensembl_gene_id,
'" rel="noopener noreferrer" target="_blank">',
res.df.dt$ensembl_gene_id,
'<br/>',
res.df.dt$external_gene_name,
'</a>')
}
for(i in which(colnames(res.df.dt) %in% c('pvalue', 'padj'))) res.df.dt[, i] <- format(res.df.dt[, i], scientific = TRUE)
res.df.dt <- res.df.dt[,!names(res.df.dt) %in% c("description", "external_gene_name")]
DT::datatable(res.df.dt,
options = list(pagingType='full_numbers',
pageLength=20,
scrollX='100%',
dom = 'Bfrtip',
buttons = c('copy', 'csv', 'excel', 'pdf', 'print', 'colvis'),
columnDefs = list(list(visible=FALSE, targets=c(2,4,5)))),
escape = FALSE,
extensions = 'Buttons',
rownames = FALSE,
filter = "top",
colnames = c('Ensembl' = 'ensembl_gene_id')) %>%
DT::formatRound(which(!colnames(res.df.dt) %in% c('pvalue', 'padj', 'Feature', 'contrast', 'description', 'ensembl_gene_id', 'external_gene_name' )), digits)
This section contains plots showing the normalized counts per sample for each group of interest. Only the best 20 features are shown, ranked by their absolute fold change values. The Y axis is on the log10 scale and the feature name is shown in the title of each plot.
plotCounts_gg <- function(i, dds, intgroup) {
data <- plotCounts(dds,
gene=i,
intgroup=intgroup,
returnData = TRUE)
ggplot(data, aes(x = data[,2], y = data[,1])) +
geom_point() +
ylab('Normalized count') +
xlab('Group') +
ggtitle(paste0(i, " ", id_table$external_gene_name[id_table$ensembl_gene_id == i])) +
coord_trans(y = "log10") +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
}
genesToPlot <- significantResults %>% arrange(-abs(log2FoldChange))
for(i in head(unique(genesToPlot$ensembl_gene_id), nBestFeatures)) {
print(plotCounts_gg(i, dds = dds, intgroup = intgroup))
}
This section shows genes of interest sorted by those with highest fold-change within each contrast.
#numResults=20
numResults <- nBestFeatures
allResultsOrdered_logFC_filter %>%
group_by(contrast) %>%
top_n(numResults, wt=abs(log2FoldChange)) %>%
ungroup() %>%
mutate(contrast=as.factor(contrast),
external_gene_name=reorder_within(external_gene_name,log2FoldChange, contrast)) %>%
ggplot(aes(x=log2FoldChange,
y=external_gene_name,
color=contrast,
size=-log(padj))) +
geom_point(show.legend = TRUE) +
facet_wrap(~contrast,
scales="free_y",
ncol=4,
labeller = labeller(contrast = label_wrap_gen(10))) +
scale_y_reordered() +
geom_vline(xintercept=0,
linetype="dashed",
color = "black",
size=1) +
ggtitle(paste0("Top ",
numResults,
" genes ranked by fold change (adjusted p-value <",
alpha,
"), grouped by treatment"))
This section shows a volcano plot for each contrast.
Note that scales are set manually for this plot: therefore, there may be data points outside the range shown (see warnings).
ggplot(allResults, aes(x=log2FoldChange, y=-log10(padj))) +
geom_point(size=0.1, alpha=0.5) +
facet_wrap(~contrast, ncol=4) + # , scales="free"
geom_vline(xintercept=c(-1.5,1.5), color="red", alpha=1.0)+
geom_hline(yintercept=-log10(0.05), color="blue", alpha=1.0) +
scale_x_continuous(name = "log2 Fold Change", limits = c(-5,5)) + #
scale_y_continuous(name = "-log10 adjusted p-value", limits = c(0,6)) #
## Warning: Removed 9613 rows containing missing values (geom_point).
###########################################################################################
######## PRODUCE INPUT FOR BMDExpress2 ####################################################
###########################################################################################
# # Create input file...
#lognorm.read.counts <- as.data.frame(counts(dds, normalized=TRUE))
# rld normalized, size factor normalized, rounded to 4 significant figures
bmdexpress <- as.data.frame(assay(rld))
bmdexpress <- cbind(SampleID=row.names(bmdexpress), bmdexpress, stringsAsFactors=F)
bmdexpress <- rbind( Dose=c("#CLASS:DOSE",as.numeric(DESeqDesign$dose)), bmdexpress, stringsAsFactors=F)
write.table(bmdexpress, file = "bmdexpress_input.txt", quote = F, sep = "\t", row.names = F, col.names = T)
# chem_subset <- DESeqDesign %>% dplyr::filter(chemical %in% c("D-8","DMSO 0.1%")) %>% dplyr::select(original_names)
# bmdexpresstest <- bmdexpress %>% dplyr::select(chem_subset$original_names)
# bmdexpresstest <- cbind(SampleID=row.names(bmdexpresstest), bmdexpresstest, stringsAsFactors=F)
# bmdexpresstest <- rbind(Chemical=c("#CLASS:CHEMICAL", as.data.frame(DESeqDesign %>%
# dplyr::filter(chemical %in% c("D-8","DMSO 0.1%")))$chemical),
# bmdexpresstest,
# stringsAsFactors=F)
# bmdexpresstest <- rbind( Dose=c("#CLASS:DOSE", as.data.frame(DESeqDesign %>%
# dplyr::filter(chemical %in% c("D-8","DMSO 0.1%")))$dose),
# bmdexpresstest,
# stringsAsFactors=F)
# write.table(bmdexpresstest, file = "bmdexpress_test_input.txt", quote = F, sep = "\t", row.names = F, col.names = T)
fastbmd <- as.data.frame(assay(dds))
fastbmd <- cbind(SampleID=row.names(fastbmd), fastbmd, stringsAsFactors=F)
fastbmd <- rbind( Chemical=c("#CLASS:CHEMICAL",as.character(DESeqDesign[DESeqDesign$original_names %in% colnames(fastbmd),]$chemical)),
fastbmd,
stringsAsFactors=F)
fastbmd <- rbind( Dose=c("#CLASS:DOSE",format(DESeqDesign[DESeqDesign$original_names %in% colnames(fastbmd),]$dose, scientific=F)),
fastbmd,
stringsAsFactors=F)
write.table(fastbmd, file = "fastbmd_input.txt", quote = F, sep = "\t", row.names = F, col.names = T)
This section performs GO enrichment on all DEGs passing filters.
## Remember that dds had ENSEMBL ids for the genes
# ensemblNames <- gsub("\\..*", "", rownames(dds))
ensemblDEGs <- significantResults$ensembl_gene_id
head(ensemblDEGs)
## [1] "ENSMAUG00000000110" "ENSMAUG00000000241" "ENSMAUG00000000263"
## [4] "ENSMAUG00000000269" "ENSMAUG00000000361" "ENSMAUG00000000409"
DEGs <- dplyr::left_join(data.frame(ensembl_gene_id=ensemblDEGs), id_table_entrez, by="ensembl_gene_id")
head(DEGs)
## ensembl_gene_id external_gene_name
## 1 ENSMAUG00000000110 Fzd6
## 2 ENSMAUG00000000241 Lhx5
## 3 ENSMAUG00000000263 Synpr
## 4 ENSMAUG00000000269 Bnc1
## 5 ENSMAUG00000000361 Timp3
## 6 ENSMAUG00000000409 Ccdc148
## description
## 1 frizzled class receptor 6 [Source:NCBI gene;Acc:101840460]
## 2 LIM homeobox 5 [Source:NCBI gene;Acc:101839931]
## 3 synaptoporin [Source:NCBI gene;Acc:101834378]
## 4 basonuclin 1 [Source:NCBI gene;Acc:101837263]
## 5 TIMP metallopeptidase inhibitor 3 [Source:NCBI gene;Acc:101831793]
## 6 coiled-coil domain containing 148 [Source:NCBI gene;Acc:101828531]
## entrezgene_id
## 1 101840460
## 2 101839931
## 3 101834378
## 4 101837263
## 5 101831793
## 6 101828531
entrezDEGs <- as.character(DEGs$entrezgene_id)
entrezDEGs <- entrezDEGs[!is.na(entrezDEGs)]
#### DEFINE INPUTS
myDEGs <- entrezDEGs
# background <- row.names(assay(dds))
background <- dplyr::left_join(data.frame(ensembl_gene_id=row.names(assay(dds))), id_table_entrez, by="ensembl_gene_id") %>%
dplyr::filter(!is.na(entrezgene_id)) %>%
dplyr::pull(entrezgene_id)
background <- as.character(background)
entrezDEGs <- entrezDEGs[!is.na(entrezDEGs)]
##### TO DO: Add multiple panels for each contrast.
##### Use "all" argument to only run each once..?
## Not all genes have a p-value
table(!is.na(resList[[1]]$padj))
##
## TRUE
## 11554
KeyType <- "ENTREZID" # ENSEMBL? ACCNUM? GID? ENTREZID?
# head(keys(OrgDb.Ma, keytype="ENTREZID"))
enrich_go_all <- enrichGO(gene = myDEGs,
universe = background, # All genes in dataset
OrgDb = OrgDb.Ma, # org.Mm.eg.db # org.Hs.eg.db # org.Rn.eg.db # OrgDb.Ma
keyType = KeyType,
ont = "all",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.05)
# clusterProfiler::dotplot(enrich_go_all, font.size=9, showCategory=10, split="ONTOLOGY") +
# theme(axis.text.y = element_text(angle = 0)) +
# scale_y_discrete(labels = function(x) stringr::str_wrap(x, width = 50)) +
# facet_wrap(~ONTOLOGY)
## Perform enrichment analysis for Biological Process (BP)
enrich_go_bp <- enrichGO(gene = myDEGs,
universe = background, # All genes in dataset
OrgDb = OrgDb.Ma, # org.Mm.eg.db # org.Hs.eg.db # org.Rn.eg.db
keyType = KeyType,
ont = "BP",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.05)
enrich_go_mf <- enrichGO(gene = myDEGs,
universe = background, # All genes in dataset
OrgDb = OrgDb.Ma, # org.Mm.eg.db # org.Hs.eg.db # org.Rn.eg.db
keyType = KeyType,
ont = "MF",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.05)
enrich_go_cc <- enrichGO(gene = myDEGs,
universe = background, # All genes in dataset
OrgDb = OrgDb.Ma, # org.Mm.eg.db # org.Hs.eg.db # org.Rn.eg.db
keyType = KeyType,
ont = "CC",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.05)
clusterProfiler::dotplot(enrich_go_bp, font.size=9, showCategory=20) +
theme(axis.text.y = element_text(angle = 0)) +
scale_y_discrete(labels = function(x) stringr::str_wrap(x, width = 30))
clusterProfiler::dotplot(enrich_go_mf, font.size=9) +
theme(axis.text.y = element_text(angle = 0)) +
scale_y_discrete(labels = function(x) stringr::str_wrap(x, width = 30))
clusterProfiler::dotplot(enrich_go_cc, font.size=9) +
theme(axis.text.y = element_text(angle = 0)) +
scale_y_discrete(labels = function(x) stringr::str_wrap(x, width = 30))
Date the report was generated.
## [1] "2020-10-26 20:37:28 UTC"
Wallclock time spent generating the report.
## Time difference of 5.115 mins
R session information.
## Warning in system("timedatectl", intern = TRUE): running command 'timedatectl' had status 1
## ─ Session info ───────────────────────────────────────────────────────────────────────────────────────────────────────
## setting value
## version R version 3.6.1 (2019-07-05)
## os Debian GNU/Linux 10 (buster)
## system x86_64, linux-gnu
## ui X11
## language (EN)
## collate en_US.UTF-8
## ctype en_US.UTF-8
## tz Etc/UTC
## date 2020-10-26
##
## ─ Packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────
## package * version date lib source
## affy 1.62.0 2019-05-02 [1] Bioconductor
## affyio 1.54.0 2019-05-02 [1] Bioconductor
## annotate 1.62.0 2019-05-02 [1] Bioconductor
## AnnotationDbi * 1.46.1 2019-08-20 [1] Bioconductor
## AnnotationHub * 2.16.1 2019-09-04 [1] Bioconductor
## ashr 2.2-47 2020-02-20 [1] CRAN (R 3.6.1)
## assertthat 0.2.1 2019-03-21 [2] CRAN (R 3.6.1)
## backports 1.1.9 2020-08-24 [1] CRAN (R 3.6.1)
## base64enc 0.1-3 2015-07-28 [2] CRAN (R 3.6.1)
## bibtex 0.4.2.2 2020-01-02 [1] CRAN (R 3.6.1)
## Biobase * 2.44.0 2019-05-02 [1] Bioconductor
## BiocFileCache * 1.8.0 2019-05-02 [1] Bioconductor
## BiocGenerics * 0.30.0 2019-05-02 [1] Bioconductor
## BiocManager 1.30.10 2019-11-16 [2] CRAN (R 3.6.1)
## BiocParallel * 1.18.1 2019-08-06 [1] Bioconductor
## BiocStyle 2.12.0 2019-05-02 [1] Bioconductor
## biomaRt * 2.40.5 2019-10-01 [1] Bioconductor
## Biostrings 2.52.0 2019-05-02 [1] Bioconductor
## bit 4.0.4 2020-08-04 [1] CRAN (R 3.6.1)
## bit64 4.0.2 2020-07-30 [1] CRAN (R 3.6.1)
## bitops 1.0-6 2013-08-17 [2] CRAN (R 3.6.1)
## blob 1.2.1 2020-01-20 [1] CRAN (R 3.6.1)
## BSgenome 1.52.0 2019-05-02 [1] Bioconductor
## bumphunter 1.26.0 2019-05-02 [1] Bioconductor
## caTools 1.18.0 2020-01-17 [1] CRAN (R 3.6.1)
## checkmate 2.0.0 2020-02-06 [1] CRAN (R 3.6.1)
## cli 2.0.2 2020-02-28 [1] CRAN (R 3.6.1)
## cluster 2.1.0 2019-06-19 [2] CRAN (R 3.6.1)
## clusterProfiler * 3.12.0 2019-05-02 [1] Bioconductor
## codetools 0.2-16 2018-12-24 [2] CRAN (R 3.6.1)
## colorspace 1.4-1 2019-03-18 [2] CRAN (R 3.6.1)
## cowplot 1.0.0 2019-07-11 [1] CRAN (R 3.6.1)
## crayon 1.3.4 2017-09-16 [2] CRAN (R 3.6.1)
## crosstalk 1.1.0.1 2020-03-13 [1] CRAN (R 3.6.1)
## curl 4.3 2019-12-02 [1] CRAN (R 3.6.1)
## data.table * 1.13.0 2020-07-24 [1] CRAN (R 3.6.1)
## DBI 1.1.0 2019-12-15 [1] CRAN (R 3.6.1)
## dbplyr * 1.4.4 2020-05-27 [1] CRAN (R 3.6.1)
## DEFormats 1.12.0 2019-05-02 [1] Bioconductor
## DelayedArray * 0.10.0 2019-05-02 [1] Bioconductor
## derfinder 1.18.9 2019-09-20 [1] Bioconductor
## derfinderHelper 1.18.1 2019-05-22 [1] Bioconductor
## DESeq2 * 1.24.0 2019-05-02 [1] Bioconductor
## digest 0.6.25 2020-02-23 [1] CRAN (R 3.6.1)
## DO.db 2.9 2020-04-15 [1] Bioconductor
## doRNG 1.8.2 2020-01-27 [1] CRAN (R 3.6.1)
## DOSE 3.10.2 2019-06-24 [1] Bioconductor
## dplyr * 0.8.5 2020-03-07 [1] CRAN (R 3.6.1)
## DT * 0.15 2020-08-05 [1] CRAN (R 3.6.1)
## edgeR * 3.26.8 2019-09-01 [1] Bioconductor
## ellipsis 0.3.1 2020-05-15 [1] CRAN (R 3.6.1)
## enrichplot * 1.4.0 2019-05-02 [1] Bioconductor
## europepmc 0.4 2020-05-31 [1] CRAN (R 3.6.1)
## evaluate 0.14 2019-05-28 [2] CRAN (R 3.6.1)
## fansi 0.4.1 2020-01-08 [1] CRAN (R 3.6.1)
## farver 2.0.3 2020-01-16 [1] CRAN (R 3.6.1)
## fastmap 1.0.1 2019-10-08 [2] CRAN (R 3.6.1)
## fastmatch 1.1-0 2017-01-28 [1] CRAN (R 3.6.1)
## fgsea 1.10.1 2019-08-21 [1] Bioconductor
## forcats * 0.5.0 2020-03-01 [1] CRAN (R 3.6.1)
## foreach 1.5.0 2020-03-30 [1] CRAN (R 3.6.1)
## foreign 0.8-71 2018-07-20 [2] CRAN (R 3.6.1)
## Formula 1.2-3 2018-05-03 [1] CRAN (R 3.6.1)
## genefilter 1.66.0 2019-05-02 [1] Bioconductor
## geneplotter 1.62.0 2019-05-02 [1] Bioconductor
## generics 0.0.2 2018-11-29 [1] CRAN (R 3.6.1)
## GenomeInfoDb * 1.20.0 2019-05-02 [1] Bioconductor
## GenomeInfoDbData 1.2.1 2020-05-25 [1] Bioconductor
## GenomicAlignments 1.20.1 2019-06-18 [1] Bioconductor
## GenomicFeatures 1.36.4 2019-07-10 [1] Bioconductor
## GenomicFiles 1.20.0 2019-05-02 [1] Bioconductor
## GenomicRanges * 1.36.1 2019-09-06 [1] Bioconductor
## ggforce 0.3.2 2020-06-23 [1] CRAN (R 3.6.1)
## ggplot2 * 3.3.2 2020-06-19 [1] CRAN (R 3.6.1)
## ggplotify 0.0.5 2020-03-12 [1] CRAN (R 3.6.1)
## ggraph 2.0.3 2020-05-20 [1] CRAN (R 3.6.1)
## ggrepel 0.8.2 2020-03-08 [1] CRAN (R 3.6.1)
## ggridges 0.5.2 2020-01-12 [1] CRAN (R 3.6.1)
## glue 1.4.1 2020-05-13 [1] CRAN (R 3.6.1)
## GO.db 3.8.2 2020-05-25 [1] Bioconductor
## GOSemSim 2.10.0 2019-05-02 [1] Bioconductor
## graphlayouts 0.7.0 2020-04-25 [1] CRAN (R 3.6.1)
## gridExtra 2.3 2017-09-09 [2] CRAN (R 3.6.1)
## gridGraphics 0.5-0 2020-02-25 [1] CRAN (R 3.6.1)
## gtable 0.3.0 2019-03-25 [2] CRAN (R 3.6.1)
## highr 0.8 2019-03-20 [2] CRAN (R 3.6.1)
## Hmisc 4.4-1 2020-08-10 [1] CRAN (R 3.6.1)
## hms 0.5.3 2020-01-08 [1] CRAN (R 3.6.1)
## htmlTable 2.0.1 2020-07-05 [1] CRAN (R 3.6.1)
## htmltools 0.5.0 2020-06-16 [1] CRAN (R 3.6.1)
## htmlwidgets 1.5.1 2019-10-08 [2] CRAN (R 3.6.1)
## httpuv 1.5.2 2019-09-11 [2] CRAN (R 3.6.1)
## httr 1.4.1 2019-08-05 [2] CRAN (R 3.6.1)
## igraph 1.2.5 2020-03-19 [1] CRAN (R 3.6.1)
## interactiveDisplayBase 1.22.0 2019-05-02 [1] Bioconductor
## invgamma 1.1 2017-05-07 [1] CRAN (R 3.6.1)
## IRanges * 2.18.3 2019-09-24 [1] Bioconductor
## irlba 2.3.3 2019-02-05 [1] CRAN (R 3.6.1)
## iterators 1.0.12 2019-07-26 [1] CRAN (R 3.6.1)
## janeaustenr 0.1.5 2017-06-10 [1] CRAN (R 3.6.1)
## jpeg 0.1-8.1 2019-10-24 [1] CRAN (R 3.6.1)
## jsonlite 1.6.1 2020-02-02 [1] CRAN (R 3.6.1)
## kableExtra * 1.1.0 2019-03-16 [1] CRAN (R 3.6.1)
## knitcitations 1.0.10 2019-09-15 [1] CRAN (R 3.6.1)
## knitr * 1.29 2020-06-23 [1] CRAN (R 3.6.1)
## knitrBootstrap 1.0.2 2020-06-02 [1] Github (jimhester/knitrBootstrap@0f8d612)
## labeling 0.3 2014-08-23 [2] CRAN (R 3.6.1)
## later 1.0.0 2019-10-04 [2] CRAN (R 3.6.1)
## lattice * 0.20-38 2018-11-04 [2] CRAN (R 3.6.1)
## latticeExtra 0.6-29 2019-12-19 [1] CRAN (R 3.6.1)
## lazyeval 0.2.2 2019-03-15 [2] CRAN (R 3.6.1)
## lifecycle 0.2.0 2020-03-06 [1] CRAN (R 3.6.1)
## limma * 3.40.6 2019-07-26 [1] Bioconductor
## locfit 1.5-9.4 2020-03-25 [1] CRAN (R 3.6.1)
## lubridate 1.7.9 2020-06-08 [1] CRAN (R 3.6.1)
## magrittr * 1.5 2014-11-22 [2] CRAN (R 3.6.1)
## markdown 1.1 2019-08-07 [2] CRAN (R 3.6.1)
## MASS 7.3-51.4 2019-03-31 [2] CRAN (R 3.6.1)
## Matrix 1.2-17 2019-03-22 [2] CRAN (R 3.6.1)
## matrixStats * 0.56.0 2020-03-13 [1] CRAN (R 3.6.1)
## memoise 1.1.0 2017-04-21 [2] CRAN (R 3.6.1)
## mime 0.9 2020-02-04 [1] CRAN (R 3.6.1)
## mixsqp 0.3-43 2020-05-14 [1] CRAN (R 3.6.1)
## munsell 0.5.0 2018-06-12 [2] CRAN (R 3.6.1)
## nnet 7.3-12 2016-02-02 [2] CRAN (R 3.6.1)
## openxlsx * 4.1.5 2020-05-06 [1] CRAN (R 3.6.1)
## pheatmap * 1.0.12 2019-01-04 [1] CRAN (R 3.6.1)
## pillar 1.4.6 2020-07-10 [1] CRAN (R 3.6.1)
## pkgconfig 2.0.3 2019-09-22 [2] CRAN (R 3.6.1)
## plotly * 4.9.2.1 2020-04-04 [1] CRAN (R 3.6.1)
## plyr 1.8.6 2020-03-03 [1] CRAN (R 3.6.1)
## png 0.1-7 2013-12-03 [2] CRAN (R 3.6.1)
## polyclip 1.10-0 2019-03-14 [1] CRAN (R 3.6.1)
## preprocessCore 1.46.0 2019-05-02 [1] Bioconductor
## prettyunits 1.1.1 2020-01-24 [1] CRAN (R 3.6.1)
## progress 1.2.2 2019-05-16 [1] CRAN (R 3.6.1)
## promises 1.1.0 2019-10-04 [2] CRAN (R 3.6.1)
## purrr 0.3.4 2020-04-17 [1] CRAN (R 3.6.1)
## qvalue 2.16.0 2019-05-02 [1] Bioconductor
## R6 2.4.1 2019-11-12 [2] CRAN (R 3.6.1)
## rappdirs 0.3.1 2016-03-28 [2] CRAN (R 3.6.1)
## RColorBrewer * 1.1-2 2014-12-07 [2] CRAN (R 3.6.1)
## Rcpp 1.0.5 2020-07-06 [1] CRAN (R 3.6.1)
## RCurl 1.98-1.2 2020-04-18 [1] CRAN (R 3.6.1)
## readr 1.3.1 2018-12-21 [1] CRAN (R 3.6.1)
## RefManageR 1.2.12 2019-04-03 [1] CRAN (R 3.6.1)
## regionReport * 1.18.2 2019-06-18 [1] Bioconductor
## reshape2 1.4.4 2020-04-09 [1] CRAN (R 3.6.1)
## RJSONIO 1.3-1.4 2020-01-15 [1] CRAN (R 3.6.1)
## rlang 0.4.6 2020-05-02 [1] CRAN (R 3.6.1)
## RMariaDB * 1.0.10 2020-08-27 [1] CRAN (R 3.6.1)
## rmarkdown 2.3 2020-06-18 [1] CRAN (R 3.6.1)
## rngtools 1.5 2020-01-23 [1] CRAN (R 3.6.1)
## rpart 4.1-15 2019-04-12 [2] CRAN (R 3.6.1)
## Rsamtools 2.0.3 2019-10-10 [1] Bioconductor
## RSQLite 2.2.0 2020-01-07 [1] CRAN (R 3.6.1)
## rstudioapi 0.11 2020-02-07 [1] CRAN (R 3.6.1)
## rtracklayer 1.44.4 2019-09-06 [1] Bioconductor
## rvcheck 0.1.8 2020-03-01 [1] CRAN (R 3.6.1)
## rvest 0.3.5 2019-11-08 [2] CRAN (R 3.6.1)
## rWikiPathways * 1.4.1 2019-07-30 [1] Bioconductor
## S4Vectors * 0.22.1 2019-09-09 [1] Bioconductor
## scales 1.1.1 2020-05-11 [1] CRAN (R 3.6.1)
## sessioninfo * 1.1.1 2018-11-05 [2] CRAN (R 3.6.1)
## shiny 1.5.0 2020-06-23 [1] CRAN (R 3.6.1)
## SnowballC 0.7.0 2020-04-01 [1] CRAN (R 3.6.1)
## SQUAREM 2020.4 2020-08-26 [1] CRAN (R 3.6.1)
## stringi 1.4.6 2020-02-17 [1] CRAN (R 3.6.1)
## stringr 1.4.0 2019-02-10 [2] CRAN (R 3.6.1)
## SummarizedExperiment * 1.14.1 2019-07-31 [1] Bioconductor
## survival 3.2-3 2020-06-13 [1] CRAN (R 3.6.1)
## tibble 3.0.3 2020-07-10 [1] CRAN (R 3.6.1)
## tidygraph 1.2.0 2020-05-12 [1] CRAN (R 3.6.1)
## tidyr 1.1.2 2020-08-27 [1] CRAN (R 3.6.1)
## tidyselect 1.1.0 2020-05-11 [1] CRAN (R 3.6.1)
## tidytext * 0.2.5 2020-07-11 [1] CRAN (R 3.6.1)
## tokenizers 0.2.1 2018-03-29 [1] CRAN (R 3.6.1)
## triebeard 0.3.0 2016-08-04 [1] CRAN (R 3.6.1)
## truncnorm 1.0-8 2018-02-27 [1] CRAN (R 3.6.1)
## tweenr 1.0.1 2018-12-14 [1] CRAN (R 3.6.1)
## UpSetR 1.4.0 2019-05-22 [1] CRAN (R 3.6.1)
## urltools 1.7.3 2019-04-14 [1] CRAN (R 3.6.1)
## VariantAnnotation 1.30.1 2019-05-19 [1] Bioconductor
## vctrs 0.3.0 2020-05-11 [1] CRAN (R 3.6.1)
## viridis * 0.5.1 2018-03-29 [2] CRAN (R 3.6.1)
## viridisLite * 0.3.0 2018-02-01 [2] CRAN (R 3.6.1)
## vsn * 3.52.0 2019-05-02 [1] Bioconductor
## webshot 0.5.2 2019-11-22 [1] CRAN (R 3.6.1)
## withr 2.2.0 2020-04-20 [1] CRAN (R 3.6.1)
## xfun 0.16 2020-07-24 [1] CRAN (R 3.6.1)
## XML 3.99-0.3 2020-01-20 [1] CRAN (R 3.6.1)
## xml2 1.3.2 2020-04-23 [1] CRAN (R 3.6.1)
## xtable 1.8-4 2019-04-21 [2] CRAN (R 3.6.1)
## XVector 0.24.0 2019-05-02 [1] Bioconductor
## yaml 2.2.1 2020-02-01 [1] CRAN (R 3.6.1)
## zip 2.1.0 2020-08-10 [1] CRAN (R 3.6.1)
## zlibbioc 1.30.0 2019-05-02 [1] Bioconductor
##
## [1] /home/mpiage/R/x86_64-pc-linux-gnu-library/3.6
## [2] /modules/software/rlang/3.6.1/lib/R/library
Pandoc version used: 2.3.1.
# Save complete workspace
setwd(outputdir)
save.image(file = "Complete_analysis.RData")
# Add a flag to the start of the script with if statements in all code chunks to check if the flag is set.
# If flag is TRUE, run analysis; if flag is FALSE, SKIP analysis and continue here (just to modify plotting output).